#### "How do past repression and indoctrination affect redistributive preferences?" ####
# authors: "Pelke, Lars"
# date: 2021-03-23
# written under "R version 3.6.0 (2019-03-11)"

#### Preliminaries ####

R.version$version.string

# clear workspace
rm(list=ls())

# set working directory

# loading packages

library(countrycode)
library(tidyverse)
library(viridis)
library(readstata13)
library(imputeTS)
library(scales)
library(ggpubr)


#### Import Data ####

# Load VDem data
vdem <- readRDS("data/vdem_10/Country_Year_V-Dem_Full+others_R_v10/V-Dem-CY-Full+Others-v10.rds") 
vdem$iso3n <- countrycode(vdem$country_name, "country.name", "iso3n", warn = TRUE)
vdem$cowcode <- countrycode(vdem$country_name, "country.name", "cown", warn = TRUE)


# Load Fariss et al. Data 

fariss_data <- read.dta13("data/Fariss et al/data_v3.dta")

fariss_data <- fariss_data %>%
  dplyr::select(country_name, country_id, year, duration, cumulative_duration, coup, s_far_Maddison_gdppc_1990_estim, 
                s_far_Maddison_gdp_1990_estimat, s_far_Maddison_pop_estimate, GDPgrowth)

fariss_data <- fariss_data %>%
  dplyr::select(-country_name)

vdem <- vdem %>%
  left_join(fariss_data, by = c("country_id", "year"))

vdem <- distinct(vdem, country_id, year, .keep_all= TRUE) # Distinct observations


#### Preparing VDem data ####
vdem <- vdem %>%
  dplyr::select(country_name,country_id, year, country_text_id, COWcode, v2x_polyarchy,
         v2x_regime, v2pepwrses, v2pepwrsoc, v2pepwrgen, v2peedueq, v2pehealth, v2peapsecon, v2dlencmps, v2x_clphy, 
         v2peapspol, v2peapsgeo, e_migdpgro, e_migdppcln, e_mipopula, e_regiongeo, iso3n, v2exl_legitideolcr_0, 
         v2exl_legitideolcr_1, v2exl_legitideolcr_2, v2exl_legitideolcr_3, v2exl_legitideolcr_4,  
         v2exl_legitideol, v2exl_legitideol_ord, v2exl_legitideol_osp,
         s_far_Maddison_gdppc_1990_estim, s_far_Maddison_gdp_1990_estimat, s_far_Maddison_pop_estimate, GDPgrowth, 
         cowcode)

vdem <- vdem %>%
  mutate(autocracy= ifelse(v2x_regime<2, 1, 0))

#### Merge GDP Data for 2017 and 2018 in the dataset ####

maddison2020 <- read.dta13("data/Maddison 2020 Data/mpd2020.dta")

maddison2020$iso3n <- countrycode(maddison2020$country, "country.name", "iso3n", warn = TRUE)

maddison2020 <- maddison2020 %>%
  select(iso3n, year, gdppc, pop)

vdem <- vdem %>%
  left_join(maddison2020, by = c("iso3n", "year"))

vdem <- distinct(vdem, country_id, year, .keep_all= TRUE) # Distinct observations

vdem <- vdem %>%
  dplyr::select(-c(e_migdppcln)) %>%
  rename(e_migdppcln = gdppc)

vdem <- vdem %>%
  mutate(e_migdppcln = log10(e_migdppcln))

#### Merge EPR data into V-DEM and Merge Inequality (GINI) Data into V-DEM ####

## EPR Data ##

eprlong <- read.csv("data/epr/EPR-2018.1.1.csv", as.is = T) %>%
  rename(country = statename) %>%
  arrange(country)

# recode differing country names
eprlong$country[eprlong$country == "Czech Republic"] <- "Czechia"
eprlong$country[eprlong$country == "Republic of Korea"] <- "South Korea"
eprlong$country[eprlong$country == "Serbia" | 
                  eprlong$country == "Serbia and Montenegro"] <- "Yugoslavia/Serbia"

# transform into country-year data
epr <- eprlong %>% 
  rowwise() %>%
  do(data.frame(country = .$country,
                from = .$from,
                to = .$to,
                year = seq(.$from, .$to, by = 1),
                group = .$group,
                gwgroupid =.$gwgroupid,
                umbrella = .$umbrella,
                size = .$size,
                status = .$status)) %>%
  arrange(country, year)

# add country-year variable
epr$countryyear <- paste(epr$country, epr$year)

# create exclusion dummy variable
epr <- epr %>%
  mutate(exclusion = if_else(status == "DISCRIMINATED"  | 
                               status == "POWERLESS" |
                               status == "SELF-EXCLUSION", 1, 0))

# create exclusion ratio variable
epr <- epr %>%
  group_by(country, year) %>%
  summarise(size_discrim = sum(size[exclusion == 1])) 

rm(eprlong)

epr$iso3n <- countrycode(epr$country, "country.name", "iso3n", warn = TRUE)

epr <- epr %>%
  ungroup()%>%
  dplyr::select(year, size_discrim, iso3n)

vdem <- vdem %>%
  left_join(epr, by = c("iso3n", "year"))

## Inequality Data Solt (2019) and XY () ##

swiid <- read_csv("data/swiid/SWIID.csv")
swiid$iso3n <- countrycode(swiid$country, "country.name", "iso3n", warn = TRUE)

swiid <- swiid %>%
  dplyr::elect(year, gini_disp, gini_mkt, iso3n)

vdem <- vdem %>%
  left_join(swiid, by = c("iso3n", "year"))

# Load VDem data
vdem8 <- read_csv("data/vdem_8/V-Dem-CY+Others-v8.csv") 
vdem8$iso3n <- countrycode(vdem8$country_name, "country.name", "iso3n", warn = TRUE)

vdem8 <- vdem8 %>%
  dplyr::select(iso3n, year, e_peginiwi)

vdem <- vdem %>%
  left_join(vdem8, by = c("iso3n", "year"))

rm(epr, swiid, vdem8, fariss_data)

#### Distinct for country_name and Year #####

vdem <- distinct(vdem, country_name, year, .keep_all = TRUE)

#### Extrapolate GDP per capita Data for 2019 ####

summary(vdem$e_migdppcln)


mean.new <- function(v) {
  if (all(is.na(v))) { return(NA) } else { return(mean(v, na.rm=T)) }
}

vdem %>% filter(year==2019) %>% # 2019
  summarize(mean = mean.new(e_migdppcln)) 

vdem <- group_by(vdem, country_id)

vdem <- vdem %>%
  group_by(country_id) %>%
  fill(e_migdppcln, .direction = "down")

vdem <- ungroup(vdem)

vdem %>% filter(year==2018) %>% # 2019
  summarize(mean = mean.new(e_migdppcln)) 


### extrapolate Pop data 


vdem <- vdem %>%
  mutate(pop = log10(pop))

summary(vdem$pop)


mean.new <- function(v) {
  if (all(is.na(v))) { return(NA) } else { return(mean(v, na.rm=T)) }
}

vdem %>% filter(year==2019) %>% # 2019
  summarize(mean = mean.new(pop)) 

vdem <- vdem %>%
  group_by(country_id) %>%
  fill(pop, .direction = "down")

vdem <- ungroup(vdem)

vdem %>% filter(year==2019) %>% # 2019
  summarize(mean = mean.new(pop)) 

#####################################################################################################
#####################################################################################################

#### Change Socialization periods for former Soviet Unions and and Socialist Federal Republic of Yugoslavia ####

## gather information for USSR (saves as Russia in V-Dem) and then change country_id, country_name and iso3n for those entries ##

Armenia <- vdem %>%
  filter(country_name =="Russia" & year>=1936 & year <=1991) %>%
  mutate(country_name = case_when(country_name =="Russia" ~ "Armenia"), 
         country_id = case_when(country_id == 11 ~ 105), 
         iso3n = case_when(iso3n == 643 ~ 51))

Azerbaijan <- vdem %>%
  filter(country_name =="Russia" & year>=1936 & year <=1991) %>%
  mutate(country_name = case_when(country_name =="Russia" ~ "Azerbaijan"), 
         country_id = case_when(country_id == 11 ~ 106), 
         iso3n = case_when(iso3n == 643 ~ 31))

Belarus <- vdem %>%
  filter(country_name =="Russia" & year>=1945 & year <=1992) %>%
  mutate(country_name = case_when(country_name =="Russia" ~ "Belarus"), 
         country_id = case_when(country_id == 11 ~ 107), 
         iso3n = case_when(iso3n == 643 ~ 112))

BosniaHerzegovina <- vdem %>%
  filter(country_name =="Serbia" & year>=1945 & year <=1992) %>%
  mutate(country_name = case_when(country_name =="Serbia" ~ "Bosnia and Herzegovina"), 
         country_id = case_when(country_id == 198 ~ 150), 
         iso3n = case_when(iso3n == 688 ~ 70))

Estonia <- vdem %>%
  filter(country_name =="Russia" & year>=1944 & year <=1991) %>%
  mutate(country_name = case_when(country_name =="Russia" ~ "Estonia"), 
         country_id = case_when(country_id == 11 ~ 161), 
         iso3n = case_when(iso3n == 643 ~ 233))

Georgia <- vdem %>%
  filter(country_name =="Russia" & year>=1936 & year <=1991) %>%
  mutate(country_name = case_when(country_name =="Russia" ~ "Georgia"), 
         country_id = case_when(country_id == 11 ~ 118), 
         iso3n = case_when(iso3n == 643 ~ 268))

Latvia <- vdem %>%
  filter(country_name =="Russia" & year>=1944 & year <=1991) %>%
  mutate(country_name = case_when(country_name =="Russia" ~ "Latvia"), 
         country_id = case_when(country_id == 11 ~ 84), 
         iso3n = case_when(iso3n == 643 ~ 428))

Lithuania <- vdem %>%
  filter(country_name =="Russia" & year>=1944 & year <=1991) %>%
  mutate(country_name = case_when(country_name =="Russia" ~ "Lithuania"), 
         country_id = case_when(country_id == 11 ~ 173), 
         iso3n = case_when(iso3n == 643 ~ 440))

Moldova <- vdem %>%
  filter(country_name =="Russia" & year>=1944 & year <=1991) %>%
  mutate(country_name = case_when(country_name =="Russia" ~ "Moldova"), 
         country_id = case_when(country_id == 11 ~ 126), 
         iso3n = case_when(iso3n == 643 ~ 498))

NorthMacedonia <- vdem %>%
  filter(country_name =="Serbia" & year>=1912 & year <=1992) %>%
  mutate(country_name = case_when(country_name =="Serbia" ~ "North Macedonia"), 
         country_id = case_when(country_id == 198 ~ 176), 
         iso3n = case_when(iso3n == 688 ~ 807))

Slovenia <- vdem %>%
  filter(country_name =="Serbia" & year>=1945 & year <=1992) %>%
  mutate(country_name = case_when(country_name =="Serbia" ~ "Slovenia"), 
         country_id = case_when(country_id == 198 ~ 202), 
         iso3n = case_when(iso3n == 688 ~ 705))

Slovakia <- vdem %>%
  filter(country_name =="Czech Republic" & year>=1945 & year <=1992) %>%
  mutate(country_name = case_when(country_name =="Czech Republic" ~ "Slovakia"), 
         country_id = case_when(country_id == 157 ~ 202), 
         iso3n = case_when(iso3n == 203 ~ 705))

Ukraine <- vdem %>%
  filter(country_name =="Russia" & year>=1944 & year <=1991) %>%
  mutate(country_name = case_when(country_name =="Russia" ~ "Ukraine"), 
         country_id = case_when(country_id == 11 ~ 100), 
         iso3n = case_when(iso3n == 643 ~ 804))

rm(Czech, Serbia)

vdem <- vdem %>%
  bind_rows(Armenia, Azerbaijan, Belarus, BosniaHerzegovina, Estonia, Georgia, Latvia, Lithuania, Moldova, 
            NorthMacedonia, Slovakia, Slovenia, Ukraine)

vdem <- vdem %>%
  group_by(country_name) %>%
  arrange(year)

## Short test ##

Slovakia <- vdem %>%
  filter(country_name=="Slovakia")

#remove additional datasets 

rm(Armenia, Azerbaijan, Belarus, BosniaHerzegovina, Estonia, Georgia, Latvia, Lithuania, Moldova, 
   NorthMacedonia, Slovakia, Slovenia, Ukraine)


#### Constructing years under democracy and autocacy ####

vdem <- vdem %>%
  mutate(autocracy= ifelse(v2x_regime<2, 1, 0))

cumsum.na <- function(x) {
  x[which(is.na(x))] <- 0
  return(cumsum(x))
}


vdem_auto <- vdem %>%
  group_by(country_id) %>% 
  mutate(democracy = ifelse(autocracy==1, 0, 1)) %>% # get binary democracy measure for constructing years under democracy
  mutate(autocracy_years = ifelse(autocracy==1, cumsum.na(autocracy), NA),  # years under autocracy
         democracy_years = ifelse(democracy==1, cumsum.na(democracy), NA)) %>% # years under democracy
  fill(c(autocracy_years, democracy_years)) %>% # fill NAs with the years under specific regime type
  ungroup() %>%
  select(country_id, year, autocracy_years, democracy_years) 

vdem_auto$autocracy_years <- replace(vdem_auto$autocracy_years, is.na(vdem_auto$autocracy_years), 0)
vdem_auto$democracy_years <- replace(vdem_auto$democracy_years, is.na(vdem_auto$democracy_years), 0)

vdem_auto$autocracy_years <- replace(vdem_auto$autocracy_years, vdem$year <1900, NA)
vdem_auto$democracy_years <- replace(vdem_auto$democracy_years, vdem$year <1900, NA)


vdem <- vdem %>%
  left_join(vdem_auto, by = c("country_id", "year"))

rm(vdem_auto)

#### Constructing Cohort Means for VDem ####

min.new <- function(v) {
  if (all(is.na(v))) { return(NA) } else { return(min(v, na.rm=T)) }
} 

max.new <- function(v) {
  if (all(is.na(v))) { return(NA) } else { return(max(v, na.rm=T)) }
}

vdem <- vdem %>% ungroup()

vdem_cohort <- vdem %>%
  mutate(cohort_5 = cut(vdem$year, seq(1885, 2020, by = 5), right = F, labels = c(1885, 1890, 1895, 1900, 1905,
                                                                                  1910, 1915, 1920, 1925, 1930, 
                                                                                  1935, 1940, 1945, 1950, 1955,
                                                                                  1960, 1965, 1970, 1975, 1980,
                                                                                  1985, 1990, 1995, 2000, 2005, 
                                                                                  2010, 2015)
  ))  %>%  # 5-year birth Cohorts))
  group_by(country_id, cohort_5) %>%
  summarise(v2x_polyarchy_5 = mean.new(v2x_polyarchy), 
            v2pepwrses_5 = mean.new(v2pepwrses), 
            v2pepwrsoc_5 = mean.new(v2pepwrsoc), 
            v2x_clphy_5 = mean.new(v2x_clphy),
            v2exl_legitideol_5 = mean.new(v2exl_legitideol), 
            v2pepwrgen_5= mean.new(v2pepwrgen), 
            v2peedueq_5= mean.new(v2peedueq), 
            v2pehealth_5= mean.new(v2pehealth), 
            v2peapsecon_5= mean.new(v2peapsecon), 
            v2dlencmps_5 = mean.new(v2dlencmps),
            e_migdpgro_5 = mean.new(e_migdpgro), 
            e_migdppcln_5 = mean.new(e_migdppcln), 
            e_mipopula_5 = mean.new(e_mipopula), 
            v2x_regime_5 = min.new(v2x_regime), 
            autocracy_5 = max.new(autocracy), 
            size_discrim_5 = max.new(size_discrim), 
            gini_disp_5 = mean.new(gini_disp), 
            gini_mkt_5 = mean.new(gini_mkt), 
            e_peginiwi_5 = mean.new(e_peginiwi), 
            nationalist_5 = max.new(v2exl_legitideolcr_0), 
            communist_5= max.new(v2exl_legitideolcr_1), 
            conservativ_5 = max.new(v2exl_legitideolcr_2), 
            separatist_5 = max.new(v2exl_legitideolcr_3), 
            religious_5 = max.new(v2exl_legitideolcr_4), 
            s_far_Maddison_gdppc_1990_estim_5 = mean.new(s_far_Maddison_gdppc_1990_estim), 
            s_far_Maddison_gdp_1990_estimat_5 = mean.new(s_far_Maddison_gdp_1990_estimat), 
            s_far_Maddison_pop_estimate_5 = mean.new(s_far_Maddison_pop_estimate), 
            GDPgrowth_5 = mean.new(GDPgrowth)
            )

vdem_cohort$cohortmatch5_15 <- as.numeric(as.character(vdem_cohort$cohort_5))

saveRDS(vdem, file = "data/vdem_data.rds")
saveRDS(vdem_cohort, file = "data/vdem_cohort.rds")

#### Figure 1 Paper ####

vdem_poland <- vdem %>%
  filter(country_name == "Poland", year >1949) 

vdem_egypt <- vdem %>%
  filter(country_name == "Egypt", year >1949)


vdem_poland$v2exl_legitideol_osp_res <- rescale(vdem_poland$v2exl_legitideol_osp, to = c(0,1), range(0,4))
vdem_egypt$v2exl_legitideol_osp_res <- rescale(vdem_egypt$v2exl_legitideol_osp, to = c(0,1), range(0,4))


F1 <- ggplot(vdem_poland, aes(x = year)) +
  geom_line(aes(y=v2exl_legitideol_osp_res), color = "red") +
  geom_line(aes(y=v2x_clphy), color = "blue") +
  theme_pubr() +
  geom_vline(xintercept = 1960, colour="grey") +
  geom_text(aes(x=1960, label="\nrespondent born", y=0.1), colour="grey", angle=90) + 
  geom_vline(xintercept = 1975, colour="black", linetype = "dashed") +
  geom_text(aes(x=1975, label="\nrespondent turned 15", y=0.15), colour="black", angle=90) + 
  geom_vline(xintercept = 1989, colour="grey", linetype = "dashed") +
  geom_text(aes(x=1989, label="\nCollapse of Communist rule", y=0.6), colour="grey", angle=90) + 
  geom_bracket(
    xmin = 1975, xmax = 1989, y.position = 0.9,
    label = "Exposure\n to autocracy"
  ) +
  ylim(0,1) + 
  ggtitle("Poland") +
  theme(plot.title = element_text(hjust = 0.5)) +
  labs(x = "Year", 
       y = "Indoctrination (red)/\n Repression (blue)")

F2 <- ggplot(vdem_egypt, aes(x = year)) +
  geom_line(aes(y=v2exl_legitideol_osp_res), color = "red") +
  geom_line(aes(y=v2x_clphy), color = "blue") +
  theme_pubr() +
  geom_vline(xintercept = 1960, colour="grey") +
  geom_text(aes(x=1960, label="\nrespondent born", y=0.1), colour="grey", angle=90) + 
  geom_vline(xintercept = 1975, colour="black", linetype = "dashed") +
  geom_text(aes(x=1975, label="\nrespondent turned 15", y=0.6), colour="black", angle=90) + 
  geom_bracket(
    xmin = 1975, xmax = 2020, y.position = 0.9,
    label = "Exposure\n to autocracy"
  ) +
  ylim(0,1) + 
  ggtitle("Egypt") +
  theme(plot.title = element_text(hjust = 0.5)) +
    labs(x = "Year", 
       y = "Indoctrination (red)/\n Repression (blue)")

ggarrange(F1, F2)

ggsave("Output/Intro1.pdf", height = 15, width = 30, units= c("cm"))


  

